library(tidyverse)
Warning messages:
1: In system("timedatectl", intern = TRUE) :
running command 'timedatectl' had status 1
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
setup for GO term analysis
The LMM was run by Natsuhiko, see /nfs/team205/nk5/Team205/ed6/DE/run.R
ct_df <- read_tsv(list.files('/nfs/team205/nk5/Team205/ed6/DE/', pattern = "^cell", full.names = TRUE), col_names = FALSE) %>%
rename(ID=X1, celltype=X2)
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
X1 = [32mcol_double()[39m,
X2 = [31mcol_character()[39m
)
Table provided by Natsuhiko
de_genes <- read_csv("~/mount/gdrive/Pan_fetal/significant_genes/LMM_Natsuhiko_results/de_lymphoid_ltsr0.9.csv")
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
celltype = [31mcol_character()[39m,
`organ (in which the gene is up/down-regulated)` = [31mcol_character()[39m,
gene = [31mcol_character()[39m,
`ltsr (posterior prob of DE)` = [32mcol_double()[39m,
logFoldChange = [32mcol_double()[39m,
BM = [32mcol_double()[39m,
GU = [32mcol_double()[39m,
KI = [32mcol_double()[39m,
LI = [32mcol_double()[39m,
MLN = [32mcol_double()[39m,
SK = [32mcol_double()[39m,
SP = [32mcol_double()[39m,
TH = [32mcol_double()[39m,
YS = [32mcol_double()[39m
)
colnames(de_genes) <- str_remove(colnames(de_genes), " .+")
de_genes
Save number of cells x celltype and organ
n_cells <- read_csv('/nfs/team205/nk5/Team205/ed6/metadata.csv.gz') %>%
group_by(anno_lvl_2, organ) %>%
summarise(n_cells=n()) %>%
rename(celltype=anno_lvl_2)
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
index = [31mcol_character()[39m,
Sample = [31mcol_character()[39m,
donor = [31mcol_character()[39m,
organ = [31mcol_character()[39m,
anno_lvl_2 = [31mcol_character()[39m,
age = [32mcol_double()[39m,
method = [31mcol_character()[39m
)
`summarise()` has grouped output by 'anno_lvl_2'. You can override using the `.groups` argument.
n_cells %>%
filter(str_detect(celltype, "ELP"))
organs <- unique(de_genes$organ)
de_genes %>%
# filter(celltype=="MATURE_B" & organ=="YS") %>%
# pivot_longer(cols=organs, names_to="organ_expr", values_to="expr") %>%
mutate(rank=rank(ltsr)) %>%
ggplot(aes(rank, ltsr)) +
geom_point()
How many genes are DE in multiple celltypes per organ?
de_genes %>%
distinct(organ, celltype, gene) %>%
group_by(organ, gene) %>%
summarise(n=n()) %>%
ggplot(aes(n)) + geom_histogram() +
facet_wrap(organ~., scales="free")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.
How many genes are DE in multiple organs per celltype?
de_genes %>%
distinct(organ, celltype, gene) %>%
group_by(celltype, gene) %>%
summarise(n=n()) %>%
ggplot(aes(n)) + geom_histogram()
`summarise()` has grouped output by 'celltype'. You can override using the `.groups` argument.
de_genes %>%
group_by(organ, celltype) %>%
summarise(n_de_genes = n()) %>%
ggplot(aes(organ, celltype, fill=n_de_genes)) +
geom_tile() +
scale_fill_viridis_c()
de_genes %>%
group_by(organ, celltype) %>%
summarise(n_de_genes = n()) %>%
ggplot(aes(celltype, n_de_genes)) +
geom_col() +
coord_flip() +
theme_bw(base_size = 14) +
facet_wrap(organ~., scales="free_x")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.
de_genes %>%
group_by(organ, celltype) %>%
summarise(n_de_genes = n()) %>%
ggplot(aes(celltype, n_de_genes, fill=organ)) +
geom_col() +
coord_flip() +
theme_bw(base_size = 14) +
scale_color_brewer(palette="Spectral") +
scale_fill_brewer(palette="Spectral")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.
de_genes %>%
group_by(organ, celltype) %>%
summarise(n_de_genes = n()) %>%
left_join(n_cells) %>%
ggplot(aes(log10(n_cells), n_de_genes, color=organ)) +
geom_point() +
scale_color_brewer(palette="Spectral")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.
Joining, by = c("organ", "celltype")
Overlap between celltype specific genes
for (o in organs){
p_df <- de_genes %>%
group_by(gene, organ) %>%
mutate(n_celltype=n()) %>%
ungroup() %>%
filter(organ == o) %>%
filter(n_celltype > 3) %>%
# filter(abs(logFoldChange) > 0.1) %>%
rename(org_expr = o) %>%
arrange(- org_expr) %>%
mutate(gene=factor(gene, levels=unique(gene))) %>%
left_join(ct_class_df) %>%
mutate(celltype=factor(celltype, levels=unlist(ct_order)))
n_genes <- length(unique(p_df$gene))
pl_height <- ifelse(n_genes > 10, 0.3*n_genes, 1*n_genes)
p <- p_df %>%
ggplot(aes(celltype, gene)) +
facet_grid(organ~class, scales="free_x", space="free") +
geom_point(aes(fill=org_expr, size=abs(org_expr)), shape=21, color='black') +
scale_fill_gradient2(high="red", low="blue") +
scale_color_gradient2(high="red", low="blue") +
theme_gray(base_size=14) +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
print(p)
}
Joining, by = "celltype"
Error: Faceting variables must have at least one value
[90mRun `rlang::last_error()` to see where the error occurred.[39m
o = "TH"
p_df <- de_genes %>%
group_by(gene, organ) %>%
mutate(n_celltype=n()) %>%
ungroup() %>%
filter(organ == o) %>%
filter(n_celltype > 5) %>%
# filter(abs(logFoldChange) > 0.1) %>%
rename(org_expr = o) %>%
arrange(- org_expr) %>%
mutate(gene=factor(gene, levels=unique(gene))) %>%
left_join(ct_class_df) %>%
mutate(celltype=factor(celltype, levels=unlist(ct_order)))
Joining, by = "celltype"
n_genes <- length(unique(p_df$gene))
p_top <- n_cells[n_cells$organ==o,] %>%
mutate(organ=factor(organ, levels=organs)) %>%
filter(celltype %in% p_df$celltype) %>%
left_join(ct_class_df) %>%
mutate(celltype=factor(celltype, levels=unlist(ct_order))) %>%
ggplot(aes(celltype, log10(n_cells))) +
geom_col() +
theme_bw(base_size=14) +
facet_grid(organ~class, scales="free_x", space="free") +
theme(axis.text.x=element_blank())
Joining, by = "celltype"
p <- p_df %>%
ggplot(aes(celltype, gene)) +
facet_grid(organ~class, scales="free_x", space="free") +
geom_point(aes(fill=org_expr, size=abs(org_expr)), shape=21, color='black') +
scale_fill_gradient2(high="red", low="blue") +
scale_color_gradient2(high="red", low="blue") +
theme_gray(base_size=14) +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
(p_top / p) + plot_layout(heights=c(1,5))
NA
NA
plot_celltype_org_de(de_genes, org = "BM", ct="ELP/PRE_PRO_B", minFC = 0.5)
plot_celltype_org_de(de_genes, org = "LI", ct="ELP/PRE_PRO_B", minFC = 0.5)
plot_celltype_org_de(de_genes, org = "TH", ct="Progenitor", minFC = 1)
plot_celltype_org_de(de_genes, org = "TH", ct="DN(P)/DN(early)")
plot_celltype_org_de(de_genes, org = "TH", ct="B1")
plot_celltype_org_de(de_genes, org = "LI", ct="B1")
plot_celltype_org_de(de_genes, org = "SP", ct="B1")
plot_celltype_org_de(de_genes, org = "BM", ct="B1")
plot_celltype_org_de(de_genes, org = "SP", ct="NKT")
plot_celltype_org_de(de_genes, org = "SP", ct="NK")
de_genes %>%
filter(celltype=="CD8aa") %>%
pivot_longer(cols = organs, names_to="expr_organs", values_to="expr") %>%
group_by(organ, expr_organs) %>%
summarise(signature=mean(abs(expr))) %>%
ggplot(aes(expr_organs, signature, color=organ)) + geom_point()
plot_celltype_org_de(de_genes, "MATURE_B", "YS") +
coord_fixed()
Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(organs)` instead of `organs` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m
plot_celltype_org_de(de_genes, "CD4+T", "MLN", minFC = 0.3) +
coord_fixed() +
plot_celltype_org_de(de_genes, "CD8+T", "MLN", minFC = 0.3) +
coord_fixed()
plot_celltype_org_de(de_genes, "CD4+T", "SK", minFC = 0.2) +
coord_fixed() +
plot_celltype_org_de(de_genes, "CD4+T", "GU", minFC = 0.2) +
coord_fixed() +
plot_celltype_org_de(de_genes, "CD4+T", "KI", minFC = 0.2) +
coord_fixed()
plot_celltype_org_de(de_genes, "CD8+T", "SP", minFC = 0.2) +
coord_fixed()
plot_celltype_org_de(de_genes, "MATURE_B", "TH", minFC = 0.8) +
coord_fixed()
dim(de_organ$beta)
[1] 12517 9
org <- "LI"
ct <- "MATURE_B"
ct_id <- ct_df$ID[ct_df$celltype==ct]
de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
signif_genes <- de_genes$gene[de_genes$organ==org & de_genes$celltype==ct]
data.frame(ct=de_organ$beta[,org], ct_org = de_ct_org$beta[,glue("{ct}:{org}")], gene=rownames(de_celltype$beta)) %>%
mutate(is_signif = gene %in% signif_genes) %>%
ggplot(aes(ct, ct_org)) +
geom_point() +
geom_point(data= . %>% filter(is_signif), color="red") +
ggrepel::geom_text_repel(data= . %>% filter(is_signif), color="red", aes(label=gene)) +
xlab(org) + ylab(glue("{ct}:{org}"))
VS marker genes
org1 <- "BM"
org2 <- "LI"
ct <- "PRO_B"
ct_id <- ct_df$ID[ct_df$celltype==ct]
de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
signif_genes <- de_genes$gene[de_genes$organ==org & de_genes$celltype==ct]
data.frame(ct=de_ct_org$beta[,glue("{ct}:{org1}")], ct_org = de_ct_org$beta[,glue("{ct}:{org2}")], gene=rownames(de_celltype$beta)) %>%
mutate(is_signif = gene %in% signif_genes) %>%
ggplot(aes(ct, ct_org)) +
geom_point() +
geom_point(data= . %>% filter(is_signif), color="red") +
geom_text(data= . %>% filter(is_signif), color="red", aes(label=gene)) +
xlab(glue("{ct}:{org1}")) + ylab(glue("{ct}:{org2}"))
library(Hmisc)
for (ct in ct_df$celltype){
ct_id <- ct_df$ID[ct_df$celltype==ct]
de_ct_org <- readRDS(glue('/nfs/team205/nk5/Team205/ed6/DE/de_{ct_id}.RDS'))
cormat <- rcorr(de_ct_org$beta)
cormat$r[cormat$P > 0.01] <- 0
corrplot::corrplot(cormat$r,method = 'color' , diag = TRUE, addCoef.col = "grey", hclust.method = "ward")
}
de_genes %>%
group_by(organ, celltype) %>%
summarise(n_de_genes = n()) %>%
ggplot(aes(organ, n_de_genes)) +
geom_col() +
coord_flip() +
theme_bw(base_size = 14) +
facet_wrap(celltype~., scales="free_x")
`summarise()` has grouped output by 'organ'. You can override using the `.groups` argument.
i.e. cells where the gene set is expressed
cor(sig_mat[1:100,1:10])
sig_score_TH_Progenitor sig_score_TH_abT(entry) sig_score_BM_B1 sig_score_GU_B1 sig_score_LI_B1 sig_score_SP_B1
sig_score_TH_Progenitor 1.00000000 0.2607735 -0.03180426 -0.049933182 -0.38547715 -0.142414491
sig_score_TH_abT(entry) 0.26077353 1.0000000 0.12534175 -0.179732663 -0.24388387 0.147321331
sig_score_BM_B1 -0.03180426 0.1253418 1.00000000 -0.503860150 -0.08516963 0.035257081
sig_score_GU_B1 -0.04993318 -0.1797327 -0.50386015 1.000000000 0.29912966 -0.003256735
sig_score_LI_B1 -0.38547715 -0.2438839 -0.08516963 0.299129659 1.00000000 0.325463255
sig_score_SP_B1 -0.14241449 0.1473213 0.03525708 -0.003256735 0.32546326 1.000000000
sig_score_TH_B1 -0.03911572 -0.1111063 -0.64250985 0.783402416 0.18909408 -0.003337539
sig_score_BM_CD4+T 0.12708686 -0.2959110 -0.21383857 0.270087050 0.30057736 -0.082727222
sig_score_LI_CD4+T -0.32082456 -0.3293925 -0.45615206 0.656951830 0.54229874 0.222532475
sig_score_MLN_CD4+T -0.21302739 -0.3044921 -0.18037049 0.048376133 0.06461616 0.165906852
sig_score_TH_B1 sig_score_BM_CD4+T sig_score_LI_CD4+T sig_score_MLN_CD4+T
sig_score_TH_Progenitor -0.039115717 0.12708686 -0.3208246 -0.21302739
sig_score_TH_abT(entry) -0.111106349 -0.29591101 -0.3293925 -0.30449214
sig_score_BM_B1 -0.642509845 -0.21383857 -0.4561521 -0.18037049
sig_score_GU_B1 0.783402416 0.27008705 0.6569518 0.04837613
sig_score_LI_B1 0.189094076 0.30057736 0.5422987 0.06461616
sig_score_SP_B1 -0.003337539 -0.08272722 0.2225325 0.16590685
sig_score_TH_B1 1.000000000 0.09536250 0.6250180 0.03827978
sig_score_BM_CD4+T 0.095362501 1.00000000 0.1790338 0.07268378
sig_score_LI_CD4+T 0.625018029 0.17903375 1.0000000 0.17857203
sig_score_MLN_CD4+T 0.038279779 0.07268378 0.1785720 1.00000000
long_cor <- left_join(long_cor_sig, long_cor_beta)
Joining, by = c("Var1", "Var2")